************************************************************
* Calibration plot (actual vs. predicted total hours)
************************************************************
clear all
set more off

// Install packages
ssc install grstyle, replace
ssc install palettes, replace
ssc install colrspace, replace

// Read in command line arguments
args covs roster output

* Load local parameter called covariates which is created using ***
include "`covs'"

* Load cross-sectional READI analysis sample data
insheet using "`roster'", comma clear

* Prepare data
destring t_payroll_total_hours_post_20, force replace
replace t_payroll_total_hours_post_20=. if treatment==0

gen pay_dose=t_payroll_total_hours_post_20
replace pay_dose=0 if treatment==1 & t_headcount_post_20==0

* Limit to treatment group members without missing participation information
keep if treatment==1 & !missing(pay_dose)

* LOO prediction
quietly reg pay_dose `strataFE' `baselines'
predict yhat, xb
predict lev, leverage

gen loo_yhat=yhat-(lev/(1-lev))*(pay_dose-yhat)

* Some observations have leverage==1; calculate LOO y-hat manually for them
levelsof cluster if lev==1
local manual_clusters=r(levels)
quietly foreach c in `manual_clusters' {
	reg pay_dose `strataFE' `baselines' if cluster!=`c'
	predict temp_yhat if cluster==`c'
	replace loo_yhat=temp_yhat if cluster==`c'
	drop temp_yhat
}

* Divide LOO y-hats into 10 bins
xtile loo_yhat_bin=loo_yhat, n(10)

* generate calibration plot
grstyle init
grstyle set plain, horizontal nogrid dotted noextend
grstyle linewidth medthin
grstyle set legend 6, nobox
grstyle set symbol
grstyle set symbolsize medsmall

graph set window fontface "Noto Serif"

preserve
gcollapse (mean) loo_yhat pay_dose, by(loo_yhat_bin) fast
tw (line loo_yhat loo_yhat, lcolor(black)) (scatter pay_dose loo_yhat, msymbol(circle) mcolor(edkblue)), xtitle("Average predicted hours participated", height(6)) ytitle("Average actual hours participated") legend(off)
graph export "`output'", replace
restore
